packages = c('readxl', 'datawizard', 'crosstalk', 'tidyr', 'lubridate','tidyverse', 'plotly', 'd3scatter','tidyquant', 'ggbraid', 'ggTimeSeries', 'CGPfunctions')
for(p in packages){
if(!require(p,character.only = T)){
install.packages(p)
}
library(p,character.only = T)
}1 The task
In this take-home exercise, you are required to uncover the impact of COVID-19 as well as the global economic and political dynamic in 2022 on Singapore bi-lateral trade (i.e. Import, Export and Trade Balance) by using appropriate analytical visualisation techniques learned in Lesson 6: It’s About Time. Students are encouraged to apply appropriate interactive techniques to enhance user and data discovery experiences.
The write-up of the take-home exercise should include but not limited to the followings:
- Describe the selection and designed consideration of the analytical data visualisation used. The discussion should limit to not more than 150 words each.
- A reproducible description of the procedures used to prepare the analytical visualisation. Please refer to the peer submission I shared.
- A write-up of not more than 100 words to discuss the patterns reveal by each analytical visualization prepared.
Packages
2 Data
Merchandise Trade provided by Department of Statistics, Singapore (DOS) is used. The study period is between January 2020 to December 2022.
Checking the number of sheets it contains
excel_sheets("data/data.xlsx")[1] "Content" "T1" "T2"
Importing data
In the code chunk below, read_xlsx() of readxl package is used to import the data worksheet of our data workbook into R.
T1 <- read_xlsx("data/data.xlsx", sheet = "T1")
T2 <- read_xlsx("data/data.xlsx", sheet = "T2")Formatting data
# Transpose the fat table to long table
T1 <- gather(T1, "MonthYear", "ImportValue", -`Data Series`)
T2 <- gather(T2, "MonthYear", "ExportValue", -`Data Series`)Note
ymd_hms() and hour() are from lubridate package
# Convert MonthYear column to date format
T1$`MonthYear` <- ym(T1$`MonthYear`)
T2$`MonthYear` <- ym(T2$`MonthYear`)
# Convert ImportValue column to numeric format
T1$`ImportValue` <- as.numeric(T1$`ImportValue`)
T2$`ExportValue` <- as.numeric(T2$`ExportValue`)Separate region and country
Code
# =================== Import =================== #
Region <- T1 %>%
filter(grepl('Million', `Data Series`)) %>%
rename("Region" = "ImportValue")
Country <- T1 %>%
filter(grepl('Thousand', `Data Series`)) %>%
rename("Country" = "ImportValue")
Import <- full_join(Region, Country, by = join_by(`Data Series`, `MonthYear`))
Import <- gather(Import , "Level", "ImportValue", -`Data Series`, -`MonthYear`)
# =================== Export =================== #
Region <- T2 %>%
filter(grepl('Million', `Data Series`)) %>%
rename("Region" = "ExportValue")
Country <- T2 %>%
filter(grepl('Thousand', `Data Series`)) %>%
rename("Country" = "ExportValue")
Export <- full_join(Region, Country, by = join_by(`Data Series`, `MonthYear`))
Export <- gather(Export , "Level", "ExportValue", -`Data Series`, -`MonthYear`)Filter date and rename column
Import <- Import %>%
filter(`MonthYear`> as.Date("2015-12-01")) %>%
rename(`Country` = `Data Series`)
Export <- Export %>%
filter(`MonthYear`> as.Date("2015-12-01")) %>%
rename(`Country` = `Data Series`)Merge Import and Export into one table
wide <- full_join(Import, Export, by = join_by(`Country`, `MonthYear`,`Level`))
wide <- wide %>%
mutate("Diff" = ImportValue-ExportValue) %>%
mutate("Total" = ImportValue+ExportValue)
wide$`Country` <- str_replace(wide$`Country`, "Mainland China", "China")
wide$`Country` <- str_replace_all(wide$`Country`, "\\(|Thousand Dollars|\\)", "")
wide$`Country` <- str_replace_all(wide$`Country`, "\\(|Million Dollars|\\)", "")
long <- gather(wide , "Type", "Value", -`Country`, -`MonthYear`,-`Level`)2.1 Table Wide : Merchandise Imports/Export By Region/Market, Monthly
| Country | MonthYear | Level | ImportValue | ExportValue | Diff | Total |
|---|---|---|---|---|---|---|
| America | 2022-12-01 | Region | 6901.5 | 6217.5 | 684.0 | 13119.0 |
| Asia | 2022-12-01 | Region | 33611.7 | 39734.8 | -6123.1 | 73346.5 |
| Europe | 2022-12-01 | Region | 7541.8 | 4924.4 | 2617.4 | 12466.2 |
| Oceania | 2022-12-01 | Region | 1399.9 | 3034.8 | -1634.9 | 4434.7 |
| Africa | 2022-12-01 | Region | 414.9 | 1088.6 | -673.7 | 1503.5 |
2.2 Table Long : Merchandise Imports/Export By Region/Market, Monthly
| Country | MonthYear | Level | Type | Value |
|---|---|---|---|---|
| America | 2022-12-01 | Region | ImportValue | 6901.5 |
| Asia | 2022-12-01 | Region | ImportValue | 33611.7 |
| Europe | 2022-12-01 | Region | ImportValue | 7541.8 |
| Oceania | 2022-12-01 | Region | ImportValue | 1399.9 |
| Africa | 2022-12-01 | Region | ImportValue | 414.9 |
3 Visualizations
3.1 Scatter plot Dashboard
Code
hline <- function(color = "steelblue") {
list(
type = "line",
yref = "paper",
xref = "paper",
y0 = 0, y1 = 1,
x0 = 0, x1 = 1,
line = list(color = color, dash="dot")
)
}
fig <- wide %>%
plot_ly(
x = ~`ImportValue`,
y = ~`ExportValue`,
color = ~`Country`,
frame = ~as.character(`MonthYear`, format = "%Y-%m"),
size = ~`Total`,
sizes = c(10,1000),
text= ~paste("Country:",`Country`,
"\nImport Value:", `ImportValue`, " Thousand Dollars",
"\nExport Value:", `ExportValue`, " Thousand Dollars",
"\nMonth Year:", `MonthYear`),
hoverinfo = "text",
type = 'scatter',
mode = 'markers'
)
# Setup layout
fig <- fig %>%
layout(title = list(text="Import - Export"),
hoverlabel = list(align = "left"),
shapes = hline(),
legend = list(orientation = "h", y = 1, x = 0),
showlegend = FALSE,
xaxis = list(title="Import Value", range = list(0, 10000000)),
yaxis = list(title="Export Value", range = list(0, 10000000)),
width=650,
height=650
)
# Setup Animation
fig <- fig %>%
animation_opts(
500, easing = "linear", redraw = FALSE
)
# Animation slider
fig <- fig %>% animation_slider(
currentvalue = list(prefix = "MONTH-YEAR :", font = list(color="red"))
)
figInsights
- Mainland China is range with a High Import - High Export spectrum. From 2020 to 2021, there was a significant increase in value, particularly export value.
- Taiwan import rate has continuously grown but we can observed significant jump of import value in 2021 after COVID.
3.2 Slope Graph
Code
library(ggplot2)
vline <- function(x = 0, color = "steelblue") {
list(
type = "line",
y0 = 0, y1 = 1,
yref = "paper",
x0 = x, x1 = x,
line = list(color = color, dash="dot")
)
}
# Prepare data for Import
slopeimport <- wide %>%
subset(Level == "Region") %>%
mutate(month = month(MonthYear)) %>%
mutate(year = year(MonthYear)) %>%
drop_na() %>%
group_by(Country, year) %>%
summarise(sumyear = sum(ImportValue)) %>%
mutate(Year = factor(year)) %>%
arrange(`sumyear`)
# Create slope plot for import
p <- newggslopegraph(dataframe = slopeimport,
Times = `Year`,
Measurement = `sumyear`,
Grouping = `Country`,
Title = "Total Import per Year by Region",
SubTitle = "2016-2022",
Caption = NULL)
p + annotate("rect",
xmin = "2020",
xmax = "2022",
ymin = -1,
ymax = 600000,
alpha = .1,
fill = "yellow")
# Prepare data for Export
slopeexport <- wide %>%
subset(Level == "Region") %>%
mutate(month = month(MonthYear)) %>%
mutate(year = year(MonthYear)) %>%
drop_na() %>%
group_by(Country, year) %>%
summarise(sumyear = sum(ExportValue)) %>%
mutate(Year = factor(year)) %>%
arrange(`sumyear`)
# Create slope plot for export
newggslopegraph(dataframe = slopeexport,
Times = `Year`,
Measurement = `sumyear`,
Grouping = `Country`,
Title = "Total Export per Year by Region",
SubTitle = "2016-2022",
Caption = NULL) +
annotate("rect", xmin = "2020", xmax = "2022", ymin = -1, ymax = 600000,
alpha = .1, fill = "yellow")

3.3 Line Plot
braid <- wide %>%
select(`Country`, `MonthYear`, `Level`, `ImportValue`, `ExportValue`) %>%
drop_na() %>%
subset(Country == "China ")
ribbon <- gather(braid , "Type", "Value", -`Country`, -`MonthYear`, -`Level`)ggplot() +
geom_line(aes(`MonthYear`, `Value`, linetype = `Type`), data = ribbon) +
geom_braid(aes(`MonthYear`,
ymin = `ImportValue`,
ymax = `ExportValue`,
fill = `ImportValue`>`ExportValue`),
data = braid,
alpha = 0.6,
method = 'line') +
guides(linetype = "none", fill = "none")
3.4 Heat Map
library(ggplot2)
library(hrbrthemes)
heatmap <- wide %>%
drop_na()
p <- ggplot(heatmap , aes(MonthYear, Country, fill= Diff)) +
scale_fill_distiller(palette = "RdPu") +
theme_ipsum() +
geom_tile() +
theme(axis.text.x = element_text(angle = 90, vjust = 1.5, hjust=1.5))+
theme(axis.text.y = element_text(size = 5, vjust = 1.5, hjust=1.5))+
labs(title = "Trade balance", x="", y="")
ggplotly(p, tooltip= "text")3.5 Cycle Plot
Step 1: Deriving month and year fields
cycle <- wide
cycle$month <- month(cycle$`MonthYear`)
cycle$year <- year(cycle$`MonthYear`)Step 2: Extracting the target country
cycle <- cycle %>%
filter(`Country`== "America ")Step 3: Computing year average import by month
x <- cycle %>%
select(Country, month, year, Total) %>%
group_by(month) %>%
summarise(avg = mean(Total))
hline.data <- cycle %>%
group_by(`month`) %>%
mutate(avgvalue = mean(`Total`))
# group_by(town) %>%
# # Calculate housing age
# mutate(housing_age = 2022 - lease_commence_date) %>%
# summarise(average_price = mean(resale_price_kSGD), average_housing_age = mean(housing_age), average_area = mean(floor_area_sqm))
# hline.data <- cycle %>%
# group_by(month) %>%
# summarise(avgvalue = mean(`ImportValue`))Step 4: Plotting the cycle plot
Code
ggplot() +
geom_line(data=cycle,
aes(x=year,
y=Diff,
group=month),
colour="black") +
geom_hline(aes(yintercept=avgvalue),
data=hline.data,
linetype=6,
colour="red",
linewidth=0.5) +
facet_grid(~month) +
labs(axis.text.x = element_blank(),
title = "xxxxxxxxxxxxxxxxxxxxxxxxx") +
xlab("") +
ylab("Import Value") +
theme(plot.title = element_text(size=22),
axis.text.x = element_text(size = 10, angle = 90),
axis.text.y = element_text(size = 10),
strip.text = element_text(size = 10))
Pay Attention
Using callouts is an effective way to highlight content that your reader give special consideration or attention.
4 Interactive Dashboard
Code
# Prepare data for dashboard
line <- long %>%
subset(Type == "ImportValue"|Type == "ExportValue")
# Building interactive filters
d <- highlight_key(line)
filter_tools <- htmltools::div(
filter_select(id = "filter",
label = "Select Country",
sharedData = d,
group = ~Country,
multiple=FALSE),
filter_slider(id = "period",
label = "Select period",
sharedData = d,
column = ~year(MonthYear),
width = "100%"),
filter_slider(id = "value",
label = "Select Value",
sharedData = d,
column = ~Value,
width = "100%"),
filter_checkbox(id = "variable",
label = "Select variable",
sharedData = d,
group = ~Type,
inline = FALSE))
vline <- function(x = 0, color = "steelblue") {
list(
type = "line",
y0 = 0, y1 = 1,
yref = "paper",
x0 = x, x1 = x,
line = list(color = color, dash="dot")
)
}
# plotting interactive scatter plot using plotly
p <- plot_ly(data=d,
type= "scatter",
mode= "line",
x= ~MonthYear,
y= ~Value,
color= ~Type,
colors= "Accent",
# fill = 'tonexty',
text= ~paste("Country:",`Country`,
"\nMonth Year:", `MonthYear`,
"\nType:",`Type`)) %>%
layout(title = list(text="<b>Import/Export trend</b>"),
hoverlabel = list(align = "left"),
legend = list(orientation = "h", y = 1, x = 0),
shapes = vline("2020"),
xaxis = list(title="Month Year"),
yaxis = list(title="Value"))
gg <- highlight(p, "plotly_selected")
# Using crosstalk bscols to put all 3 elements (filter, scatter plot, datatable) together.
crosstalk::bscols(filter_tools,gg,DT::datatable(d, class= "display",
filter=list(position="top", clear=FALSE),
options=list(pageLength = 10,scrollY = TRUE,
iDisplayLength = 25)),
widths = c(4, 8, 12),
annotations = list(caption = "Data from Department of Statistics, Singapore (DOS)"))function filter_default() {
document.getElementById("filter").getElementsByClassName("selectized")
[0].selectize.setValue("China (Thousand Dollars)", false);
}
window.onload = filter_default;Code
# Building interactive filters
# d <- highlight_key(ribbon)
# # d2 <- highlight_key(braid)
#
# filter_tools <- htmltools::div(
# filter_select(id = "country",
# label = "Select Country",
# sharedData = d,
# group = ~Country,
# multiple=FALSE),
#
# filter_slider(id = "period",
# label = "Select period",
# sharedData = d,
# column = ~year(MonthYear),
# width = "100%"))
#
# # plotting interactive scatter plot using plotly
# p <- ggplot() +
# geom_line(aes(`MonthYear`, `Value`, linetype = `Type`), data = ribbon)
# # +
# # geom_braid(aes(`MonthYear`,
# # ymin = `ImportValue`,
# # ymax = `ExportValue`,
# # fill = `ImportValue`>`ExportValue`),
# # data = braid, alpha = 0.6) +
# # guides(linetype = "none", fill = "none")
#
#
# gg <- highlight(p, "plotly_selected")
#
# # Using crosstalk bscols to put all 3 elements (filter, scatter plot, datatable) together.
# crosstalk::bscols(filter_tools, gg, widths = c(4, 8))